# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import time
import sys
from pathlib import Path
import unicodedata
import xml.etree.ElementTree as ET
import csv
import requests
import zipfile
import io
from collections import defaultdict
import multiprocessing

# ==============================================================================
# --- 1. KOPĒJĀ KONFIGURĀCIJA ---
# ==============================================================================
print("Inicializējam skriptu un konfigurāciju...")

# --- IEVADES FAILU NOSAUKUMI (TIKS ĢENERĒTI 1. DAĻĀ) ---
# <<< ŠEIT VEIKTAS IZMAIŅAS
file_5mega = Path('5_Būves_raksturojošie_dati.csv')
file_6mega = Path('6_Telpu_grupu_raksturojošie_dati.csv')
file_7mega = Path('7_Kadastra_objektam_reģistrētās_adreses.csv')
file_10mega = Path('10_Objekta_novērtējums_un_kadastrālās_vērtības.csv')

# --- GALA IZVADES FAILS (TIKS ĢENERĒTS 2. DAĻĀ) ---
final_output_file = Path('out-summ-level.csv')

# --- PARAMETRI UN KONSTANTES DATU ANALĪZEI ---
target_kods = [1110, 1121, 1122]
level_to_score_map = {'A': 5, 'B': 4, 'C': 3, 'D': 2, 'E': 1}
population_data_raw = """
Rīga	605273
Daugavpils	77799
Jelgava	54701
Jēkabpils	21150
Jūrmala	52154
Liepāja	66680
Ogre	22767
Rēzekne	26131
Valmiera	22376
Ventspils	32634
Aizkraukles novads	28618
Aizkraukle	6853
Jaunjelgava	1713
Koknese	2427
Pļaviņas	2823
Alūksnes novads	13059
Alūksne	6175
Augšdaugavas novads	24361
Ilūkste	2082
Subate	549
Ādažu novads	23281
Ādaži	7535
Balvu novads	17910
Balvi	5584
Viļaka	1172
Bauskas novads	40906
Bauska	9811
Iecava	5343
Cēsu novads	40943
Cēsis	14699
Līgatne	1009
Dienvidkurzemes novads	32708
Aizpute	3892
Durbe	483
Grobiņa	3593
Pāvilosta	851
Priekule	1810
Dobeles novads	27474
Auce	2136
Dobele	8589
Gulbenes novads	18740
Gulbene	6715
Jelgavas novads	32053
Jēkabpils novads	39276
Aknīste	917
Viesīte	1510
Krāslavas novads	19833
Dagda	1805
Krāslava	6854
Kuldīgas novads	26956
Kuldīga	9940
Skrunda	1767
Ķekavas novads	31303
Baldone	3711
Baloži	6846
Ķekava	5039
Limbažu novads	27852
Ainaži	644
Aloja	1060
Limbaži	6613
Salacgrīva	2480
Staicele	766
Līvānu novads	10215
Līvāni	6790
Ludzas novads	20745
Kārsava	1843
Ludza	7524
Zilupe	1271
Madonas novads	27255
Cesvaine	1210
Lubāna	1453
Madona	6561
Mārupes novads	37025
Mārupe	16544
Ogres novads	57689
Ikšķile	7448
Ķegums	2059
Lielvārde	5853
Olaines novads	20658
Olaine	9908
Preiļu novads	15768
Preiļi	5841
Rēzeknes novads	28305
Viļāni	2749
Ropažu novads	35178
Vangaži	3192
Salaspils novads	23694
Salaspils	17826
Saldus novads	26320
Brocēni	2834
Saldus	9553
Saulkrastu novads	9926
Saulkrasti	3149
Siguldas novads	31469
Sigulda	14632
Smiltenes novads	17697
Ape	777
Smiltene	5129
Talsu novads	34675
Sabile	1369
Stende	1532
Talsi	8649
Valdemārpils	1135
Tukuma novads	43641
Kandava	3276
Tukums	16318
Valkas novads	7501
Valka	4564
Valmieras novads	50283
Mazsalaca	1113
Rūjiena	2650
Seda	1092
Strenči	957
Varakļānu novads	2890
Varakļāni	1653
Ventspils novads	10303
Piltene	821
"""

# ==============================================================================
# --- 2. PALĪGFUNKCIJAS ---
# ==============================================================================
print("Definējam visas nepieciešamās palīgfunkcijas...")

# --- PALĪGFUNKCIJAS DATU IEGŪŠANAI (1. DAĻA) ---
def _parse_building_xml(xml_content_bytes):
    data_rows = []
    try:
        root = ET.fromstring(xml_content_bytes)
        for item_data in root.findall(".//BuildingItemData"):
            data_rows.append([
                item_data.findtext(".//ObjectCadastreNr", default="-"),
                item_data.findtext(".//BuildingCadastreNr", default="-"),
                item_data.findtext(".//BuildingName", default="-"),
                item_data.findtext(".//BuildingUseKindId", default="-"),
                item_data.findtext(".//BuildingUseKindName", default="-"),
                item_data.findtext(".//BuildingExploitYear", default="-"),
                item_data.findtext(".//BuildingKindId", default="-"),
                item_data.findtext(".//TotalArea", default="-"),
                item_data.findtext(".//FlatTotalArea", default="-"),
                item_data.findtext(".//LivingArea", default="-")
            ])
    except ET.ParseError: pass
    return data_rows

def process_buildings(url, output_file):
    print(f"Lejupielādēju: {output_file}...")
    response = requests.get(url, timeout=300)
    response.raise_for_status()
    print("Lejupielāde pabeigta. Sāku paralēlu apstrādi...")
    with zipfile.ZipFile(io.BytesIO(response.content)) as thezip:
        xml_files_content = [thezip.read(name) for name in thezip.namelist() if name.endswith('.xml')]
    with multiprocessing.Pool() as pool:
        results = pool.map(_parse_building_xml, xml_files_content)
    data = sorted([row for sublist in results for row in sublist], key=lambda x: (x[0], x[1]))
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['kadastrs', 'kadastra_telpa', 'buve', 'kods', 'telpa', 'gads', 'apakskods', 'pilna_platiba', 'ekas_platiba', 'dzivojama_platiba'])
        writer.writerows(data)

def _parse_premise_xml(xml_content_bytes):
    data_rows = []
    try:
        root = ET.fromstring(xml_content_bytes)
        for item_data in root.findall(".//PremiseGroupItemData"):
            data_rows.append([
                item_data.findtext(".//ObjectRelation/ObjectCadastreNr", ""),
                item_data.findtext(".//PremiseGroupCadastreNr", ""),
                item_data.findtext(".//PremiseGroupUseKindId", ""),
                item_data.findtext(".//PremiseGroupName", ""),
                item_data.findtext(".//PremiseGroupUseKindName", ""),
                item_data.findtext(".//PremiseGroupArea", "")
            ])
    except ET.ParseError: pass
    return data_rows

def process_premise_groups(url, output_file):
    print(f"Lejupielādēju: {output_file}...")
    response = requests.get(url, timeout=300)
    response.raise_for_status()
    print("Lejupielāde pabeigta. Sāku paralēlu apstrādi...")
    with zipfile.ZipFile(io.BytesIO(response.content)) as thezip:
        xml_files_content = [thezip.read(name) for name in thezip.namelist() if name.endswith('.xml')]
    with multiprocessing.Pool() as pool:
        results = pool.map(_parse_premise_xml, xml_files_content)
    data = sorted([row for sublist in results for row in sublist], key=lambda x: (x[0], x[1]))
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['kadastrs', 'kadastra_telpa', 'kods', 'buve', 'telpa', 'platiba'])
        writer.writerows(data)

def _parse_address_xml(xml_content_bytes):
    data_rows = []
    try:
        root = ET.fromstring(xml_content_bytes)
        for item_data in root.findall(".//AddressItemData"):
            data_rows.append([
                item_data.findtext(".//ObjectCadastreNr", default="-"),
                item_data.findtext(".//ObjectType", default="-"),
                item_data.findtext(".//PostIndex", default="-"),
                item_data.findtext(".//Town", default="-"),
                item_data.findtext(".//County", default="-"),
                item_data.findtext(".//Parish", default="-"),
                item_data.findtext(".//Village", default="-"),
                item_data.findtext(".//Street", default="-"),
                item_data.findtext(".//House", default="-")
            ])
    except ET.ParseError: pass
    return data_rows

def process_addresses(url, output_file):
    print(f"Lejupielādēju: {output_file}...")
    response = requests.get(url, timeout=300)
    response.raise_for_status()
    print("Lejupielāde pabeigta. Sāku paralēlu apstrādi...")
    with zipfile.ZipFile(io.BytesIO(response.content)) as thezip:
        xml_files_content = [thezip.read(name) for name in thezip.namelist() if name.endswith('.xml')]
    with multiprocessing.Pool() as pool:
        results = pool.map(_parse_address_xml, xml_files_content)
    data = sorted([row for sublist in results for row in sublist], key=lambda x: x[0])
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['kadastrs', 'buve', 'pasta_indekss', 'pilseta', 'novads', 'pagasts', 'ciems', 'iela', 'ekas_Nr'])
        writer.writerows(data)

def _parse_valuation_xml(xml_content_bytes):
    data_list = []
    try:
        root = ET.fromstring(xml_content_bytes)
        for item_data in root.findall(".//ValuationItemData"):
            kadastrs = item_data.findtext(".//ObjectRelation/ObjectCadastreNr")
            if not kadastrs: continue
            buve = item_data.findtext(".//ObjectRelation/ObjectType", default="-")
            univ_cena, fisk_cena = '-', '-'
            valuation_list = item_data.find(".//ValuationDataList")
            if valuation_list is not None:
                for row_data in valuation_list.findall(".//ValuationRowData"):
                    value_type, cadastral_value = row_data.findtext(".//ValueType"), row_data.findtext(".//ObjectCadastralValue")
                    if value_type == 'univ': univ_cena = cadastral_value
                    elif value_type == 'fisc': fisk_cena = cadastral_value
            data_list.append({'kadastrs': kadastrs, 'buve': buve, 'univ_cena': univ_cena, 'fisk_cena': fisk_cena})
    except ET.ParseError: pass
    return data_list

def process_valuations(url, output_file):
    print(f"Lejupielādēju: {output_file}...")
    response = requests.get(url, timeout=300)
    response.raise_for_status()
    print("Lejupielāde pabeigta. Sāku paralēlu apstrādi...")
    with zipfile.ZipFile(io.BytesIO(response.content)) as thezip:
        xml_files_content = [thezip.read(name) for name in thezip.namelist() if name.endswith('.xml')]
    with multiprocessing.Pool() as pool:
        results = pool.map(_parse_valuation_xml, xml_files_content)
    data_grouped = defaultdict(lambda: {'kadastrs': '-', 'buve': '-', 'univ_cena': '-', 'fisk_cena': '-'})
    for sublist in results:
        for item in sublist:
            kadastrs = item['kadastrs']
            if data_grouped[kadastrs]['buve'] == '-': data_grouped[kadastrs]['buve'] = item['buve']
            if item['univ_cena'] != '-': data_grouped[kadastrs]['univ_cena'] = item['univ_cena']
            if item['fisk_cena'] != '-': data_grouped[kadastrs]['fisk_cena'] = item['fisk_cena']
            data_grouped[kadastrs]['kadastrs'] = kadastrs
    final_data = sorted(list(data_grouped.values()), key=lambda x: x['kadastrs'])
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['kadastrs', 'buve', 'univ_cena', 'fisk_cena'])
        writer.writeheader()
        writer.writerows(final_data)

# --- PALĪGFUNKCIJAS DATU ANALĪZEI (2. DAĻA) ---
def is_invalid(value):
    return pd.isna(value) or str(value).strip() == '-' or str(value).strip() == ''

def apply_location_priority(df, pilseta_col='pilseta', novads_col='novads'):
    res_pilseta = pd.Series('-', index=df.index, dtype=str)
    res_novads = pd.Series('-', index=df.index, dtype=str)
    if pilseta_col not in df.columns or novads_col not in df.columns: return res_pilseta, res_novads
    temp_pilseta = df[pilseta_col].replace(['', '-'], np.nan)
    temp_novads = df[novads_col].replace(['', '-'], np.nan)
    mask_pilseta_valid = ~temp_pilseta.isna()
    res_pilseta[mask_pilseta_valid] = df.loc[mask_pilseta_valid, pilseta_col]
    mask_condition2 = (~mask_pilseta_valid) & (~temp_novads.isna())
    res_novads[mask_condition2] = df.loc[mask_condition2, novads_col]
    return res_pilseta, res_novads

def perform_fallback_merge(main_df, df7_lookup, prefix_len, kadastrs_col_source, target_pilseta_col, target_novads_col):
    fallback_mask = (main_df[target_pilseta_col].apply(is_invalid)) & (main_df[target_novads_col].apply(is_invalid))
    if not fallback_mask.any(): return main_df
    prefix_col_name = f'kad_prefix_{prefix_len}'
    main_df[prefix_col_name] = main_df[kadastrs_col_source].astype(str).str[:prefix_len]
    df_to_fallback = main_df.loc[fallback_mask, ['original_index', prefix_col_name]].copy()
    df7_lookup[prefix_col_name] = df7_lookup['kadastrs'].astype(str).str[:prefix_len]
    df7_for_fallback = df7_lookup.loc[df7_lookup[prefix_col_name].astype(str).str.len() == prefix_len, [prefix_col_name, 'pilseta', 'novads']].drop_duplicates(subset=[prefix_col_name])
    fallback_merged = pd.merge(df_to_fallback, df7_for_fallback, on=prefix_col_name, how='left')
    fallback_merged['pilseta_fb'] = fallback_merged['pilseta'].fillna('')
    fallback_merged['novads_fb'] = fallback_merged['novads'].fillna('')
    fallback_merged['fb_final_pilseta'], fallback_merged['fb_final_novads'] = apply_location_priority(fallback_merged, 'pilseta_fb', 'novads_fb')
    update_data = fallback_merged.set_index('original_index')[['fb_final_pilseta', 'fb_final_novads']].rename(columns={'fb_final_pilseta': target_pilseta_col, 'fb_final_novads': target_novads_col})
    main_df_indexed = main_df.set_index('original_index', drop=False)
    main_df_indexed.update(update_data)
    return main_df_indexed.reset_index(drop=True)

def parse_population_data(raw_data):
    population_dict = {}
    for line in raw_data.strip().split('\n'):
        if not line.strip(): continue
        try:
            parts = line.split('\t')
            if len(parts) == 2:
                location = parts[0].strip()
                count_str = unicodedata.normalize("NFKD", parts[1]).replace(' ', '').strip()
                population_dict[location] = int(count_str)
        except (ValueError, Exception): pass
    return population_dict

def calculate_virtual_population(row):
    kods, platiba = row['kods'], row['platiba']
    if pd.isna(kods) or pd.isna(platiba) or platiba <= 0: return pd.NA
    try: platiba, kods = float(platiba), int(kods)
    except (ValueError, TypeError): return pd.NA
    if kods in [1110, 1121]:
        if platiba < 30: return 1
        elif platiba <= 70: return 2
        elif platiba <= 250: return 3
        elif platiba <= 400: return 4
        elif platiba <= 600: return 5
        elif platiba <= 800: return 6
        else: return 7
    elif kods == 1122:
        if platiba < 25: return 1
        elif platiba <= 45: return 2
        elif platiba <= 100: return 3
        elif platiba <= 200: return 4
        elif platiba <= 300: return 5
        else: return 6
    return pd.NA

def assign_real_population(group, real_pop_total):
    valid_group = group.dropna(subset=['virtuali_cilveki'])
    valid_group = valid_group[valid_group['virtuali_cilveki'] > 0].copy()
    output_pop = pd.Series(index=group.index, dtype='Int64')
    if pd.isna(real_pop_total) or real_pop_total <= 0 or valid_group.empty: return output_pop
    virtual_pop_sum = valid_group['virtuali_cilveki'].sum()
    if virtual_pop_sum <= 0: return output_pop
    scaling_factor = real_pop_total / virtual_pop_sum
    initial_real_pop = valid_group['virtuali_cilveki'] * scaling_factor
    allocated_pop = np.floor(initial_real_pop).astype(int).clip(lower=1)
    remainder = real_pop_total - allocated_pop.sum()
    if remainder > 0:
        indices_to_increment = (initial_real_pop - np.floor(initial_real_pop)).sort_values(ascending=False).index
        for i in range(int(remainder)):
            allocated_pop.loc[indices_to_increment[i % len(indices_to_increment)]] += 1
    max_limits = valid_group['kods'].map({1110: 7, 1121: 7, 1122: 6}).fillna(np.inf)
    allocated_pop = allocated_pop.clip(upper=max_limits)
    output_pop.update(allocated_pop)
    return output_pop.astype('Int64')

def modify_kadastra_telpa_key(kad_telpa):
    if isinstance(kad_telpa, str) and len(kad_telpa) >= 17:
        try: return kad_telpa[:11] + kad_telpa[14:]
        except IndexError: return pd.NA
    return pd.NA

def weighted_quantile(values, quantiles, sample_weight):
    values = np.array(values)
    sample_weight = np.array(sample_weight)
    
    nan_mask = np.isnan(values) | np.isnan(sample_weight)
    values, sample_weight = values[~nan_mask], sample_weight[~nan_mask]
    
    if values.size == 0: return np.full(len(quantiles), np.nan)

    positive_weights_mask = sample_weight > 0
    if not np.any(positive_weights_mask): return np.full(len(quantiles), np.nan)
    
    values, sample_weight = values[positive_weights_mask], sample_weight[positive_weights_mask]
    
    sorter = np.argsort(values)
    values_sorted, weights_sorted = values[sorter], sample_weight[sorter]
    
    weighted_cumsum, total_weight = np.cumsum(weights_sorted), np.sum(weights_sorted)
    
    if total_weight <= 0: return np.full(len(quantiles), np.nan)
    
    normalized_cumsum = weighted_cumsum / total_weight
    return np.interp(quantiles, normalized_cumsum, values_sorted)

def calculate_adjustment_factor(gads, kods):
    gads_adj, kods_adj = 0.0, 0.0
    if pd.notna(gads):
        try:
            gads_int = int(gads)
            if 1950 <= gads_int <= 1969: gads_adj = -0.10
            elif 1970 <= gads_int <= 1989: gads_adj = -0.05
            elif 2000 <= gads_int <= 2014: gads_adj = 0.05
            elif 2015 <= gads_int <= 2030: gads_adj = 0.10
        except (ValueError, TypeError): pass
    if pd.notna(kods):
        try:
            kods_int = int(kods)
            if kods_int in [1110, 1121]: kods_adj = 0.05
            elif kods_int == 1122: kods_adj = -0.05
        except (ValueError, TypeError): pass
    return gads_adj + kods_adj

def assign_level(value, thresholds):
    if pd.isna(value): return None
    q35, q55, q75, q90 = thresholds
    if pd.notna(q35) and value < q35: return 'E'
    elif pd.notna(q55) and value < q55: return 'D'
    elif pd.notna(q75) and value < q75: return 'C'
    elif pd.notna(q90) and value < q90: return 'B'
    elif pd.notna(q90): return 'A'
    return None

def score_to_level(score):
    if pd.isna(score): return pd.NA
    elif score >= 4.5: return 'A'
    elif score >= 3.5: return 'B'
    elif score >= 2.5: return 'C'
    elif score >= 1.5: return 'D'
    else: return 'E'

# ==============================================================================
# --- 3. GALVENAIS PROCESA BLOKS ---
# ==============================================================================
def main():
    start_time_full = time.time()
    
    # --- 1. DAĻA: DATU IEGŪŠANA UN SAGATAVOŠANA ---
    print("==========================================================")
    print("=== 1. DAĻA: SĀKTA DATU IEGŪŠANA NO DATA.GOV.LV ===")
    print("==========================================================")
    
    tasks_download = [
        {"description": "Būvju dati", "url": "https://data.gov.lv/dati/dataset/be841486-4af9-4d38-aa14-6502a2ddb517/resource/9fe29b57-07cd-4458-b22c-b0b9f2bc8915/download/building.zip", "output": file_5mega, "function": process_buildings},
        {"description": "Telpu grupu dati", "url": "https://data.gov.lv/dati/dataset/be841486-4af9-4d38-aa14-6502a2ddb517/resource/5d8b1cfa-1e67-4b77-a6ac-b4e37eba0d7e/download/premisegroup.zip", "output": file_6mega, "function": process_premise_groups},
        {"description": "Adrešu dati", "url": "https://data.gov.lv/dati/dataset/be841486-4af9-4d38-aa14-6502a2ddb517/resource/2aeea249-6948-4713-92c2-e01543ea0f33/download/address.zip", "output": file_7mega, "function": process_addresses},
        {"description": "Vērtību dati", "url": "https://data.gov.lv/dati/dataset/be841486-4af9-4d38-aa14-6502a2ddb517/resource/35a2dbfa-e4b9-41d5-88d0-e1393115dcb1/download/valuation.zip", "output": file_10mega, "function": process_valuations}
    ]

    for task in tasks_download:
        print(f"\n--- SĀKAS POSMS: {task['description']} ---")
        try:
            task["function"](task["url"], task["output"])
            print(f"✓ Posms '{task['description']}' PABEIGTS VEIKSMĪGI. Fails '{task['output']}' ir izveidots.")
        except Exception as e:
            print(f"✗ KRITISKA KĻŪDA posmā '{task['description']}': {e}")
            print("Skripta darbība tiek pārtraukta, jo nav iespējams iegūt nepieciešamos datus.")
            return

    print("\n==========================================================")
    print("=== ✓ 1. DAĻA: DATU IEGŪŠANA PABEIGTA ===")
    print("==========================================================")

    # --- 2. DAĻA: DATU ANALĪZE UN GALA REZULTĀTA IZVEIDE ---
    print("\n==========================================================")
    print("=== 2. DAĻA: SĀKTA IEGŪTO DATU ANALĪZE ===")
    print("==========================================================")

    try:
        # 1. solis
        print("\n[1/6] 📍 Sāk atrašanās vietu noteikšanu...")
        df_properties = pd.read_csv(file_6mega, dtype={'kadastrs': str, 'kadastra_telpa': str, 'kods': 'Int64'}, usecols=['kadastrs', 'kadastra_telpa', 'kods', 'platiba'])
        df_properties = df_properties[df_properties['kods'].isin(target_kods)].copy()
        df_properties['original_index'] = df_properties.index
        df_locations_raw = pd.read_csv(file_7mega, dtype={'kadastrs': str, 'pilseta': str, 'novads': str}, usecols=['kadastrs', 'pilseta', 'novads'])
        df_locations_raw['pilseta'] = df_locations_raw['pilseta'].fillna('')
        df_locations_raw['novads'] = df_locations_raw['novads'].fillna('')
        df_locations_raw = df_locations_raw[df_locations_raw['kadastrs'].str.strip() != '']
        df_merged = pd.merge(df_properties, df_locations_raw, left_on='kadastra_telpa', right_on='kadastrs', how='left', suffixes=('_prop', '_loc'))
        df_merged['final_pilseta'], df_merged['final_novads'] = apply_location_priority(df_merged, 'pilseta', 'novads')
        df_merged = perform_fallback_merge(df_merged, df_locations_raw.copy(), 8, 'kadastrs_prop', 'final_pilseta', 'final_novads')
        df_merged = perform_fallback_merge(df_merged, df_locations_raw.copy(), 7, 'kadastrs_prop', 'final_pilseta', 'final_novads')
        df_analysis = df_merged[['kadastrs_prop', 'kadastra_telpa', 'kods', 'platiba', 'final_pilseta', 'final_novads']].copy()
        df_analysis.rename(columns={
            'kadastrs_prop': 'kadastrs',
            'final_pilseta': 'pilseta',
            'final_novads': 'novads'
        }, inplace=True)
        df_analysis['pilseta'] = df_analysis['pilseta'].replace('', '-')
        df_analysis['novads'] = df_analysis['novads'].replace('', '-')
        print(f"    Rezultāts: {len(df_analysis)} īpašumi ar noteiktām atrašanās vietām.")

        # 2. solis
        print("\n[2/6] 👨‍👩‍👧‍👦 Sāk iedzīvotāju skaita aprēķināšanu...")
        df_analysis = df_analysis.replace('-', pd.NA)
        df_analysis['virtuali_cilveki'] = df_analysis.apply(calculate_virtual_population, axis=1).astype('Int64')
        population_map = parse_population_data(population_data_raw)
        df_analysis['vieta'] = df_analysis['pilseta'].combine_first(df_analysis['novads']).str.strip().str.replace(r'\s+nov\.$', ' novads', regex=True)
        df_analysis['reali_cilveki'] = df_analysis.groupby('vieta')['virtuali_cilveki'].transform(lambda g: assign_real_population(df_analysis.loc[g.index], population_map.get(g.name)))
        df_analysis['reali_cilveki'] = df_analysis['reali_cilveki'].astype('Int64')
        print(f"    Rezultāts: {df_analysis['reali_cilveki'].notna().sum()} īpašumiem piešķirts reālais iedzīvotāju skaits.")

        # 3. solis
        print("\n[3/6] 💶 Sāk cenas datu pievienošanu...")
        df_cenas = pd.read_csv(file_10mega, usecols=['kadastrs', 'univ_cena'], dtype={'kadastrs': str})
        df_cenas['univ_cena'] = pd.to_numeric(df_cenas['univ_cena'], errors='coerce')
        df_cenas_unique = df_cenas.dropna(subset=['univ_cena']).drop_duplicates(subset=['kadastrs']).set_index('kadastrs')
        df_analysis['univ_cena'] = df_analysis['kadastra_telpa'].map(df_cenas_unique['univ_cena'])
        missing_mask = df_analysis['univ_cena'].isna()
        if missing_mask.any():
            mod_keys = df_analysis.loc[missing_mask, 'kadastra_telpa'].apply(modify_kadastra_telpa_key)
            df_analysis.loc[missing_mask, 'univ_cena'] = mod_keys.map(df_cenas_unique['univ_cena'])
        print(f"    Rezultāts: {df_analysis['univ_cena'].notna().sum()} īpašumiem pievienota cena.")
        
        # 4. solis
        print("\n[4/6] 📅 Sāk būvniecības gada pievienošanu...")
        try:
            df_gadi_cols = list(pd.read_csv(file_5mega, nrows=0).columns)
            gadi_building_id_col = 'kadastra_telpa' if 'kadastra_telpa' in df_gadi_cols else 'karastra_telpa'
            df_gadi = pd.read_csv(file_5mega, usecols=['kadastrs', gadi_building_id_col, 'gads'], dtype={gadi_building_id_col: str, 'kadastrs': str})
            df_gadi['gads'] = pd.to_numeric(df_gadi['gads'], errors='coerce').astype('Int64')
            
            lookup_gads1 = df_gadi.dropna(subset=['gads']).drop_duplicates(subset=[gadi_building_id_col]).set_index(gadi_building_id_col)['gads']
            lookup_gads2 = df_gadi.dropna(subset=['gads']).drop_duplicates(subset=['kadastrs']).set_index('kadastrs')['gads']
            
            df_analysis['gads'] = df_analysis['kadastrs'].map(lookup_gads1)
            
            missing_gads_mask = df_analysis['gads'].isna()
            if missing_gads_mask.any():
                fallback_keys = df_analysis.loc[missing_gads_mask, 'kadastrs'].str[:11]
                gadi_fallback = fallback_keys.map(lookup_gads2)
                df_analysis.loc[missing_gads_mask, 'gads'] = gadi_fallback
        except Exception as e:
            print(f"    Brīdinājums: Neizdevās pievienot gadus: {e}. Turpinām bez gada datiem.")
            df_analysis['gads'] = pd.NA
        
        df_analysis['gads'] = df_analysis['gads'].astype('Int64')
        print(f"    Rezultāts: {df_analysis['gads'].notna().sum()} īpašumiem pievienots būvniecības gads.")

        # 5. solis
        print("\n[5/6] 📊 Sāk individuālo līmeņu (A-E) piešķiršanu...")
        df_analysis.dropna(subset=['univ_cena'], inplace=True)
        if df_analysis.empty: raise ValueError("Pēc rindu ar tukšu 'univ_cena' dzēšanas nepalika datu.")
        df_analysis['value_per_person'] = df_analysis['univ_cena'] / df_analysis['reali_cilveki'].replace(0, np.nan)
        df_analysis['adj_factor'] = df_analysis.apply(lambda row: calculate_adjustment_factor(row['gads'], row['kods']), axis=1)
        df_analysis['adj_value'] = df_analysis['value_per_person'] * (1 + df_analysis['adj_factor'])
        
        # Sagatavo funkciju grupu pielietošanai
        def calculate_levels_for_group(g):
            group_data = df_analysis.loc[g.index]
            thresholds = weighted_quantile(
                group_data['value_per_person'], 
                [0.35, 0.55, 0.75, 0.9], 
                group_data['reali_cilveki']
            )
            return g.apply(assign_level, thresholds=thresholds)

        df_analysis['level'] = df_analysis.groupby('vieta')['adj_value'].transform(calculate_levels_for_group)
        print(f"    Rezultāts: {df_analysis['level'].notna().sum()} īpašumiem piešķirts līmenis.")

        # 6. solis
        print("\n[6/6] 🏢 Sāk ēku kopējo līmeņu aprēķināšanu...")
        df_final = df_analysis[['kadastrs', 'reali_cilveki', 'level']].copy()
        df_final.dropna(subset=['reali_cilveki', 'level'], inplace=True)
        df_final = df_final[df_final['reali_cilveki'] > 0]
        df_final['level_score'] = df_final['level'].map(level_to_score_map).astype('Int64')
        df_final['weighted_score'] = df_final['level_score'] * df_final['reali_cilveki']
        if df_final.empty: raise ValueError("Pēc datu tīrīšanas nav palikušas derīgas rindas līmeņu apkopošanai.")
        building_summary = df_final.groupby('kadastrs').agg(kopejie_cilveki=('reali_cilveki', 'sum'), kopejais_svars_score=('weighted_score', 'sum'))
        building_summary['avg_rating'] = building_summary['kopejais_svars_score'] / building_summary['kopejie_cilveki']
        building_summary['level'] = building_summary['avg_rating'].apply(score_to_level)
        final_output_df = building_summary.reset_index()[['kadastrs', 'kopejie_cilveki', 'level']]
        
        final_output_df.to_csv(final_output_file, index=False, sep=',', na_rep='')
        print("\n==========================================================")
        print("=== ✓ 2. DAĻA: DATU ANALĪZE PABEIGTA ===")
        print("==========================================================")
        print(f"\n✅ GALA REZULTĀTS VEIKSMĪGI SAGLABĀTS failā: '{final_output_file}'")
        print(f"   Kopā apstrādātas un saglabātas {len(final_output_df)} ēkas.")

    except Exception as e:
        print(f"\n❌ KRITISKA KĻŪDA ANALĪZES DAĻĀ: {e}", file=sys.stderr)
        import traceback
        traceback.print_exc()
    finally:
        end_time_full = time.time()
        print(f"\n⏱️  KOPĒJAIS SKRIPTA IZPILDES LAIKS: {end_time_full - start_time_full:.2f} sekundes.")

if __name__ == "__main__":
    multiprocessing.freeze_support()
    cpu_cores = multiprocessing.cpu_count()
    print(f"Dators ir gatavs izmantot līdz {cpu_cores} kodoliem katra arhīva iekšējai apstrādei.")
    main()